import jieba
import logging
from wordcloud import WordCloud

def get_text(file: str):
    """
    从文件中读取文件内容
    :param file: 要读取的文件
    :return: 文件内容
    """
    with open(file=file, mode="r", encoding="utf-8") as f:
        content = f.read()
        # 将多行文本转换成单行文本
        content = content.replace("\n", " ")
        return content


def clean_text(content: str):
    """
    清理文本内容
    :param content:要清理的文本
    :return: 清理后的内容
    """
    word_list = ["有限", "责任", "公司"]
    for word in word_list:
        content = content.replace(word, "")
    return content


def segment_words(content: str):
    """
    将字符串分词
    :param content: 要分词的字符串
    :return:拆分后的词
    """
    jieba.setLogLevel(log_level=logging.INFO)
    words = jieba.cut(content)
    return " ".join(words)


def generate_wordcloud(words:str):
    """
    将词生成词云
    :param words:
    :return:
    """
    font_path = "../wordcloud_tutorial/fonts/msyhl.ttc"
    picture = WordCloud(font_path=font_path, width=800, height=600)
    picture.generate(words)
    picture.to_file("陕西公司名录.jpeg")


if __name__ == '__main__':
    text = get_text("namelist.txt")
    text = clean_text(text)
    words = segment_words(text)
    generate_wordcloud(words)
